In [5]:
from os.path import getsize
from glob import glob
from time import time
import netCDF4
def do_raw (chunks):
with open(fn, 'rb') as d:
for start, amount in chunks:
d.seek(start)
d.read(amount)
def do_netcdf (chunks):
with netCDF4.Dataset(fn) as d:
v = d.variables[name]
for (t0, t1), h in chunks:
v[t0:t1,:h,...]
name = 'ua'
fn = glob('/opt/data/IPSL-CM5A-MR/rcp85/6hr/atmos/6hrLev/r1i1p1/latest/%s/*.nc' % name)[0]
print fn
In [43]:
method = (
('raw', 'contiguous', 'few'),
('raw', 'contiguous', 'many'),
('raw', 'spread', None),
('netcdf', 'contiguous', None),
('netcdf', 'spread', None)
)[4]
with netCDF4.Dataset(fn) as d:
shape = d.variables[name].shape
sz = getsize(fn)
n_cells = reduce(int.__mul__, shape)
cell_sz = float(sz) / n_cells
blocks = shape[0] / shape[1]
slcs = shape[1] * blocks
slc_cells = shape[2] * shape[3]
slc_sz = int(round(slc_cells * cell_sz))
read_ts = 1000
read_hs = 25
block_cells = shape[1] * slc_cells
block_sz = int(round(block_cells * cell_sz))
if method[0] == 'raw':
do = do_raw
if method[1] == 'contiguous':
if method[2] == 'few':
chunks = [(i * block_sz, block_sz) for i in xrange(blocks)]
else:
chunks = [(i * block_sz, slc_sz) for i in xrange(slcs)]
else:
chunks = [(i * block_sz, slc_sz) for i in xrange(slcs)]
print '# bytes:', sum(zip(*chunks)[1])
else:
do = do_netcdf
if method[1] == 'contiguous':
chunks = [((read_hs * i, read_hs * (i + 1)), shape[1]) for i in xrange(blocks / read_hs)]
chunks.append(((chunks[-1][0][1], chunks[-1][0][1] + blocks % read_hs), shape[1]))
else:
chunks = [((read_ts * i, read_ts * (i + 1)), 1) for i in xrange(slcs / read_ts)]
chunks.append(((chunks[-1][0][1], chunks[-1][0][1] + slcs % read_ts), 1))
print '# numbers:', sum((t1 - t0) * h for (t0, t1), h in chunks) * slc_cells * cell_sz
print '# chunks:', len(chunks)
t0 = time()
do(chunks)
print time() - t0
In [43]:
# raw, contiguous, few
6.0811650753, 5.89195919037, 6.07426786423
# raw, contiguous, many
76.244145155, 77.6571240425, 78.8810811043
# raw, spread (many)
76.6748468876, 76.8060109615, 76.8620369434
# netcdf, contiguous
12.133687973, 12.1888580322, 12.0239961147
# netcdf, spread
113.553792953, 110.792984962, 108.883361101